In [1]:
import itertools as it
import numpy as np
import scipy as sp
import pandas as pd

Permutation and Combination

Permutation


In [2]:
list(it.permutations([1,2,3], 2))     # 2 represents number of values in a set


Out[2]:
[(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]

In [3]:
len(list(it.permutations([1,2,3], 2)))


Out[3]:
6

In [4]:
# With replacement
["".join(p) for p in it.permutations("122")]


Out[4]:
['122', '122', '212', '221', '212', '221']

In [5]:
len(["".join(p) for p in it.permutations("122")])


Out[5]:
6

Combination


In [6]:
list(it.combinations([1,2,3],2))


Out[6]:
[(1, 2), (1, 3), (2, 3)]

In [7]:
len(list(it.combinations([1,2,3],2)))


Out[7]:
3

In [8]:
# With replacement
list(it.combinations_with_replacement([1,2,3],2))


Out[8]:
[(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]

In [9]:
len(list(it.combinations_with_replacement([1,2,3],2)))


Out[9]:
6

Measures of Central Tendency

Arithmetic Mean


In [10]:
np.mean([5,2,4,3,6])


Out[10]:
4.0

Weighted Mean


In [11]:
np.average([5,2,4,3,6], weights = [1, 2, 1, 3, 4])


Out[11]:
4.181818181818182

Harmonic Mean


In [12]:
from scipy import stats
stats.hmean([5,2,4,3,6])


Out[12]:
3.4482758620689657

Geometric Mean


In [13]:
stats.gmean([5,2,4,3,6])


Out[13]:
3.7279192731913513

Median


In [14]:
np.median([5, 10, 24, 456])


Out[14]:
17.0

Mode


In [15]:
sp.stats.mode([5, 4, 21, 1, 4, 2, 5, 1, 1])


Out[15]:
ModeResult(mode=array([1]), count=array([3]))

Measures of Dispersion

Range


In [16]:
# Minimum
print(np.min([50,6,5,8]))

# Maximum
print(np.max([50,6,5,8]))

# Range = Maximum - Minumum
np.ptp([50,6,5,8])


5
50
Out[16]:
45

Variance


In [17]:
np.var([1,1,10],ddof=0)      # ddof - degrees of freedom which defines N of denominator in variance formula. 
                             # ddof = 0 means N | ddof = 1 means N - 1


Out[17]:
18.0

Standard Deviation


In [18]:
np.std([20, 1, 5])


Out[18]:
8.178562764256865

z-score


In [19]:
stats.zscore([50, 10, 20])


Out[19]:
array([ 1.37281295, -0.98058068, -0.39223227])

Quantiles


In [20]:
sp.stats.mstats.mquantiles([5,2,4,3,6]) # 0.25 | 0.50 | 0.75


Out[20]:
array([2.7, 4. , 5.3])

InterQuartile Range


In [21]:
sp.stats.iqr([5,2,4,3,6])


Out[21]:
2.0

Five point summary


In [22]:
pd.Series([5, 2, 4, 3, 6]).describe()


Out[22]:
count    5.000000
mean     4.000000
std      1.581139
min      2.000000
25%      3.000000
50%      4.000000
75%      5.000000
max      6.000000
dtype: float64

Measure of Shape

Skewness


In [23]:
sp.stats.mstats.skew([5, 2, 350, 112, 22, 1000]) # right skewed


Out[23]:
masked_array(data=1.39788443,
             mask=False,
       fill_value=1e+20)

In [24]:
sp.stats.mstats.skew([-80, -52, 3, 2]) # left skewed


Out[24]:
masked_array(data=-0.22189074,
             mask=False,
       fill_value=1e+20)

Kurtosis


In [25]:
sp.stats.mstats.kurtosis([210, 55, 10, 20, 33, 4])


Out[25]:
0.8096975919315055

In [ ]: